minor cleaning up.
-/* net_ring.c
+/* network.c
*
- * ring data structures for buffering messages between hypervisor and
- * guestos's. As it stands this is only used for network buffer exchange.
+ * Network virtualization for Xen. Lower-level network interactions are in
+ * net/dev.c and in the drivers. This file contains routines to interact
+ * with the virtual interfaces (vifs) and the virtual firewall/router through
+ * the use of rules.
*
+ * Copyright (c) 2002, A K Warfield and K A Fraser
*/
#include <hypervisor-ifs/network.h>
/* vif globals
* sys_vif_list is a lookup table for vifs, used in packet forwarding.
- * it should be replaced later by something a little more flexible.
+ * it will be replaced later by something a little more flexible.
*/
-int sys_vif_count;
-net_vif_t *sys_vif_list[MAX_SYSTEM_VIFS];
-net_rule_ent_t *net_rule_list;
-kmem_cache_t *net_vif_cache;
+int sys_vif_count; /* global vif count */
+net_vif_t *sys_vif_list[MAX_SYSTEM_VIFS]; /* global vif array */
+net_rule_ent_t *net_rule_list; /* global list of rules */
+kmem_cache_t *net_vif_cache;
kmem_cache_t *net_rule_cache;
-static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED;
-static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED;
+static rwlock_t net_rule_lock = RW_LOCK_UNLOCKED; /* rule mutex */
+static rwlock_t sys_vif_lock = RW_LOCK_UNLOCKED; /* vif mutex */
void print_net_rule_list();
+
+/* ----[ VIF Functions ]----------------------------------------------------*/
+
+/* create_net_vif - Create a new vif and append it to the specified domain.
+ *
+ * the domain is examined to determine how many vifs currently are allocated
+ * and the newly allocated vif is appended. The vif is also added to the
+ * global list.
+ *
+ */
+
net_vif_t *create_net_vif(int domain)
{
net_vif_t *new_vif;
return new_vif;
}
-/* delete the last vif in the given domain. There doesn't seem to be any reason
- * (yet) to be able to axe an arbitrary vif, by vif id.
+/* delete_net_vif - Delete the last vif in the given domain.
+ *
+ * There doesn't seem to be any reason (yet) to be able to axe an arbitrary
+ * vif, by vif id.
*/
+
void destroy_net_vif(struct task_struct *p)
{
struct sk_buff *skb;
kmem_cache_free(net_vif_cache, p->net_vif_list[i]);
}
+/* print_vif_list - Print the contents of the global vif table.
+ */
+
void print_vif_list()
{
int i;
}
}
+/* ----[ Net Rule Functions ]-----------------------------------------------*/
+
+/* add_net_rule - Add a new network filter rule.
+ */
+
int add_net_rule(net_rule_t *rule)
{
net_rule_ent_t *new_ent;
return 0;
}
+/* delete_net_rule - Delete an existing network rule.
+ */
+
int delete_net_rule(net_rule_t *rule)
{
net_rule_ent_t *ent = net_rule_list, *prev = NULL;
return 0;
}
-/* add_default_net_rule.
+/* add_default_net_rule - Set up default network path (ie for dom0).
+ *
* this is a utility function to route all traffic with the specified
* ip address to the specified vif. It's used to set up domain zero.
*/
+
void add_default_net_rule(int vif_id, u32 ipaddr)
{
net_rule_t new_rule;
}
+/* print_net_rule - Print a single net rule.
+ */
+
void print_net_rule(net_rule_t *r)
{
printk("===] NET RULE:\n");
printk("=] action : %u\n", r->action);
}
+/* print_net_rule_list - Print the global rule table.
+ */
+
void print_net_rule_list()
{
- net_rule_ent_t *ent = net_rule_list;
+ net_rule_ent_t *ent;
int count = 0;
+ read_lock(&net_rule_lock);
+
+ ent = net_rule_list;
+
while (ent)
{
print_net_rule(&ent->r);
count++;
}
printk("\nTotal of %d rules.\n", count);
+
+ read_unlock(&net_rule_lock);
}
-/* Apply the rules to this skbuff and return the vif id that it is bound for.
- * -1 to drop.
+/* net_find_rule - Find the destination vif according to the current rules.
+ *
+ * Apply the rules to this skbuff and return the vif id that it is bound for.
+ * If there is no match, VIF_DROP is returned.
*/
int net_find_rule(u8 nproto, u8 tproto, u32 src_addr, u32 dst_addr, u16 src_port, u16 dst_port,
return dest;
}
+/* net_get_target_vif - Find the vif that the given sk_buff is bound for.
+ *
+ * This is intended to be the main interface to the VFR rules, where
+ * net_find_rule (above) is a private aspect of the current matching
+ * implementation. All in-hypervisor routing should use this function only
+ * to ensure that this can be rewritten later.
+ *
+ * Currently, network rules are stored in a global linked list. New rules are
+ * added to the front of this list, and (at present) the first matching rule
+ * determines the vif that a packet is sent to. This is obviously not ideal,
+ * it might be more advisable to have chains, or at lest most-specific
+ * matching, and moreover routing latency increases linearly (for old rules)
+ * as new rules are added.
+ *
+ * net_get_target_vif examines the sk_buff and pulls out the relevant fields
+ * based on the packet type. it then calls net_find_rule to scan the rule
+ * list.
+ */
+
int net_get_target_vif(struct sk_buff *skb)
{
int target = VIF_DROP;
return VIF_DROP;
}
+/* ----[ Syscall Interface ]------------------------------------------------*/
+
/*
* This is the hook function to handle guest-invoked traps requesting
* changes to the network system.
#define TRAP_INSTR "int $0x82"
-static inline int HYPERVISOR_set_trap_table(trap_info_t *table)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_trap_table),
- "b" (table) );
-
- return ret;
-}
-
-
-static inline int HYPERVISOR_pt_update(page_update_request_t *req, int count)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_pt_update),
- "b" (req), "c" (count) );
-
- return ret;
-}
-
-
-static inline int HYPERVISOR_console_write(const char *str, int count)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_console_write),
- "b" (str), "c" (count) );
-
-
- return ret;
-}
-
-static inline int HYPERVISOR_set_pagetable(unsigned long ptr)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_pagetable),
- "b" (ptr) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_set_guest_stack(
- unsigned long ss, unsigned long esp)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_set_guest_stack),
- "b" (ss), "c" (esp) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_net_update(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_net_update) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_fpu_taskswitch(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_fpu_taskswitch) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_yield(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_yield) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_exit(void)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_exit) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_dom0_op(void *dom0_op)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_dom0_op),
- "b" (dom0_op) );
-
- return ret;
-}
-
-static inline int HYPERVISOR_network_op(void *network_op)
-{
- int ret;
- __asm__ __volatile__ (
- TRAP_INSTR
- : "=a" (ret) : "0" (__HYPERVISOR_network_op),
- "b" (network_op) );
-
- return ret;
-}
-
/* Event message note:
*
* Here, as in the interrupts to the guestos, additional network interfaces
/* vif.h
*
- * this is the hypervisor end of the network code. The net_ring structure
+ * This is the hypervisor end of the network code. The net_ring structure
* stored in each vif is placed on a shared page to interact with the guest VM.
+ *
+ * Copyright (c) 2002, A K Warfield and K A Fraser
*/
/* virtual network interface struct and associated defines. */
/* VIF-related defines. */
#define MAX_GUEST_VIFS 2 // each VIF is a small overhead in task_struct
-#define MAX_SYSTEM_VIFS 256 // trying to avoid dynamic allocation
+#define MAX_SYSTEM_VIFS 256
/* vif globals */
extern int sys_vif_count;
unsigned long cpu_mask;
#endif
struct task_struct *p;
-// unsigned int dest_dom;
int this_cpu = smp_processor_id();
struct softnet_data *queue;
unsigned long flags;
local_irq_save(flags);
netdev_rx_stat[this_cpu].total++;
-/*
- skb->h.raw = skb->nh.raw = skb->data;
-
- if ( skb->len < 2 ) goto drop;
- switch ( ntohs(skb->mac.ethernet->h_proto) )
- {
- case ETH_P_ARP:
- if ( skb->len < 28 ) goto drop;
- dest_dom = ntohl(*(unsigned long *)
- (skb->nh.raw + 24));
- break;
- case ETH_P_IP:
- if ( skb->len < 20 ) goto drop;
- dest_dom = ntohl(*(unsigned long *)
- (skb->nh.raw + 16));
- break;
- default:
- goto drop;
- }
-
- if ( (dest_dom < opt_ipbase) ||
- (dest_dom > (opt_ipbase + 16)) )
- goto drop;
-
- dest_dom -= opt_ipbase;
-
- read_lock(&tasklist_lock);
- p = &idle0_task;
- do {
- if ( p->domain != dest_dom ) continue;
- skb_queue_tail(&p->net_vif_list[0]->skb_list, skb); // vfr will fix.
- cpu_mask = mark_hyp_event(p, _HYP_EVENT_NET_RX);
- read_unlock(&tasklist_lock);
- goto found;
- }
- while ( (p = p->next_task) != &idle0_task );
- read_unlock(&tasklist_lock);
- goto drop;
-*/
+
if (skb->src_vif == VIF_UNKNOWN_INTERFACE)
skb->src_vif = VIF_PHYSICAL_INTERFACE;
if (skb->dst_vif == VIF_UNKNOWN_INTERFACE)
net_get_target_vif(skb);
-if (skb->dst_vif > 1)
-printk("netifrx got packet bound for system vif %d.\n", skb->dst_vif);
+
if (sys_vif_list[skb->dst_vif] == NULL)
{
// the target vif does not exist.
goto drop;
}
+ /* This lock-and-walk of the task list isn't really necessary, and is an
+ * artifact of the old code. The vif contains a pointer to the skb list
+ * we are going to queue the packet in, so the lock and the inner loop
+ * could be removed.
+ *
+ * The argument against this is a possible race in which a domain is killed
+ * as packets are being delivered to it. This would result in the dest vif
+ * vanishing before we can deliver to it.
+ */
+
if ( skb->dst_vif >= VIF_PHYSICAL_INTERFACE )
{
read_lock(&tasklist_lock);
read_unlock(&tasklist_lock);
goto drop;
}
-// found:
-#if 0
- __skb_queue_tail(&queue->input_pkt_queue,skb);
- /* Runs from irqs or BH's, no need to wake BH */
- cpu_raise_softirq(this_cpu, NET_RX_SOFTIRQ);
- local_irq_restore(flags);
- get_sample_stats(this_cpu);
- return softnet_data[this_cpu].cng_level;
-//#else
- hyp_event_notify(cpu_mask);
- local_irq_restore(flags);
- return 0;
-#endif
drop:
netdev_rx_stat[this_cpu].dropped++;
while ( (skb = skb_dequeue(¤t->net_vif_list[nvif]->skb_list))
!= NULL )
{
-if (nvif > 0)
-printk("flushrxqueue on vif %d (sys: %d) (pkt_type=%d)\n", nvif, current->net_vif_list[nvif]->id, skb->pkt_type);
/*
* Write the virtual MAC address into the destination field
* of the ethernet packet. Furthermore, do the same for ARP
* is always 00-00-00-00-00-00.
*
* Actually, the MAC address is now all zeros, except for the
- * first sixteen bits, which are the per-host vif id.
- * (so eth0 should be 00-00-..., eth1 is 01-00-...)
+ * second sixteen bits, which are the per-host vif id.
+ * (so eth0 should be 00-00-..., eth1 is 00-01-...)
*/
memset(skb->mac.ethernet->h_dest, 0, ETH_ALEN);
- *(unsigned int *)(skb->mac.ethernet->h_dest) = nvif;
+ *(unsigned int *)(skb->mac.ethernet->h_dest + 1) = nvif;
if ( ntohs(skb->mac.ethernet->h_proto) == ETH_P_ARP )
{
memset(skb->nh.raw + 18, 0, ETH_ALEN);
- *(unsigned int *)(skb->nh.raw + 18) = nvif;
+ *(unsigned int *)(skb->nh.raw + 18 + 1) = nvif;
}
i = net_ring->rx_cons;
net_ring = current_vif->net_ring;
for ( i = net_ring->tx_cons; i != net_ring->tx_prod; i = TX_RING_INC(i) )
{
-if (j > 0)
-printk("net_update called with packet on vif %d system: %d)\n", j, current_vif->id);
if ( copy_from_user(&tx, net_ring->tx_ring+i, sizeof(tx)) )
continue;
net_get_target_vif(skb);
if ( skb->dst_vif > VIF_PHYSICAL_INTERFACE )
{
-if (j > 0)
- printk("Sent to netif_rx.\n");
if (netif_rx(skb) == 0)
/* Give up non-local reference. Packet delivered locally. */
kfree_skb(skb);
}
else if ( skb->dst_vif == VIF_PHYSICAL_INTERFACE )
{
-if (j > 0)
- printk("Sent to physical device.\n");
skb_push(skb, skb->dev->hard_header_len);
dev_queue_xmit(skb);
}
else
{
-if (j > 0)
- printk("dropped.\n");
kfree_skb(skb);
}
- /*
- skb_get(skb);
- skb->protocol = eth_type_trans(skb, skb->dev);
- if ( netif_rx(skb) == 0 )
- {
- kfree_skb(skb);
- }
- else
- {
- skb_push(skb, skb->dev->hard_header_len);
- dev_queue_xmit(skb);
- }
- */
}
}
net_ring->tx_cons = i;
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/init.h>
-#include <linux/ip.h> //remove this.
#include <net/sock.h>
unsigned int i;
struct net_private *np = (struct net_private *)dev->priv;
-if ((np->id > 0) || ((skb->len > 20)
- && (skb->nh.iph != NULL)
- && (skb->nh.iph->protocol == 1)))
- printk(KERN_WARNING "TX on vif %d (dev:%p)\n", np->id, dev);
-
if ( np->tx_full )
{
printk(KERN_WARNING "%s: full queue wasn't stopped!\n", dev->name);
skb->protocol = eth_type_trans(skb, dev);
np->stats.rx_packets++;
np->stats.rx_bytes += np->net_ring->rx_ring[i].size;
-
-if (((skb->len > 20)
- && ((*(unsigned char *)(skb->data + 9) == 1) || (np->id > 0)) ))
- printk(KERN_WARNING "RX on vif %d (dev:%p)\n", np->id, dev);
-if ((skb != NULL) && (skb->data != NULL) && (skb->len > 20) && ntohl(*(unsigned long *)(skb->data + 16)) == 167903489)
- printk(KERN_WARNING "RX INT (driver): pkt_type is %d.!", skb->pkt_type);
-
netif_rx(skb);
dev->last_rx = jiffies;
-
-
}
np->rx_idx = i;
/* Deal with hypervisor racing our resetting of rx_event. */
smp_mb();
if ( np->net_ring->rx_cons != i ) {
-//printk("redoing network rx...\n");
goto again;
}
}
dev->get_stats = network_get_stats;
memset(dev->dev_addr, 0, ETH_ALEN);
- *(unsigned int *)(dev->dev_addr) = i;
+ *(unsigned int *)(dev->dev_addr + 1) = i;
if ( (err = register_netdev(dev)) != 0 )
{
np->dev = dev;
np->id = i;
list_add(&np->list, &dev_list);
-printk(KERN_WARNING "Added VIF, ifindex is %d.\n", dev->ifindex);
}
return 0;